--- title: Anomaly Detection using Facebook Prophet keywords: fastai sidebar: home_sidebar nb_path: "nbs/prophet.ipynb" ---
import pandas as pd
from pathlib import Path
import optuna
import numpy as np
import plotly.express as px
import re
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot, plot_cross_validation_metric
from fbprophet.diagnostics import cross_validation, performance_metrics
from pathlib import Path
from pandas.api.types import is_numeric_dtype
import plotly.graph_objs as go
df = pd.read_csv('../data/data.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.set_index('timestamp')
df = df.resample('60min').mean()
df = df.reset_index()
end_date = '2014-06-20 00:00:00'
mask = (df.timestamp <= end_date)
mask2 = (df.timestamp >= '2014-05-31 00:00:00')
df_temp = df.loc[mask]
df_temp = df_temp.reset_index()
df_temp = df_temp.loc[mask2]
df_temp = df_temp.drop(columns = 'index')
df_temp
df = df.reset_index()
df = df.drop(columns = 'index')
df
def train_baseline_prophet_model(df_red):
m = Prophet()
m.fit(df_red)
return m
def prophet_objective(trial, df_past, horizon):
seasonality_mode = trial.suggest_categorical('seasonality_mode', ["additive", "multiplicative"]) #
changepoint_prior_scale = trial.suggest_uniform('changepoint_prior_scale', 0.001,0.5)
changepoint_range = trial.suggest_float('changepoint_range', 0.50,0.85)
seasonality_prior_scale = trial.suggest_float('seasonality_prior_scale', 0.01,10, log=True)#
growth = trial.suggest_categorical('growth', ["linear", "flat"])
m = Prophet(
growth=growth,
seasonality_mode=seasonality_mode,
changepoint_prior_scale=changepoint_prior_scale,
changepoint_range=changepoint_range,
seasonality_prior_scale=seasonality_prior_scale,
uncertainty_samples=0
)
m.fit(df_past)
#Using INITIAL days to train, build a forecast of HORIZON days long, every PERIOD days
df_cv = cross_validation(m, horizon=horizon)
df_p = performance_metrics(df_cv, rolling_window=1)
print(df_p)
mape = np.mean(df_p.mape)
return mape
def train_optimal_prophet_model(df: pd.DataFrame):
study = optuna.create_study(direction='minimize')
study.optimize(
lambda trial: prophet_objective(
trial, df, '10day'
),
n_trials=30
)
return study.best_params
df.columns = ['ds', 'y']
end_date = '2014-06-25 00:00:00'
mask = (df['ds'] <= end_date)
df_train = df.loc[mask]
df_test = df.loc[~mask]
df_train = df_train.reset_index()
df_test = df_test.reset_index()
df_test
m = train_baseline_prophet_model(df_train)
preds = m.predict(df_test)
m.plot(preds)
fig = go.Figure()
# Create and style traces
fig.add_trace(go.Scatter(x=df_test['ds'], y=df_test['y'], name='Actual',))
fig.add_trace(go.Scatter(x=preds['ds'], y=preds['yhat'], name='Prediction',))
fig.add_trace(go.Scatter(x=df_train['ds'], y=df_train['y'], name='Training Actuals',))
fig.add_trace(go.Scatter(x=preds['ds'], y=preds['yhat_lower'], name='yhat_lower', fill='tozeroy', mode='lines'))
fig.add_trace(go.Scatter(x=preds['ds'], y=preds['yhat_upper'], name='yhat_upper',fill='tozeroy', mode='lines'))
fig.update_layout(
title=f"model prediction vs actual",
xaxis_title="Timestamps",
yaxis_title=f"value",
font=dict(
family="Courier New, monospace",
size=12,
color="RebeccaPurple"
))
# fig.write_html(f"/content/plots/{name}-{metric}.html")
fig.show()
df_cv = cross_validation(m, horizon='5day')
df_p = performance_metrics(df_cv, rolling_window=1)
df_p
best_params = train_optimal_prophet_model(df_train)
best_params
m = Prophet(
growth=best_params['growth'],
seasonality_mode=best_params['seasonality_mode'],
changepoint_prior_scale=best_params['changepoint_prior_scale'],
changepoint_range=best_params['changepoint_range'],
seasonality_prior_scale=best_params['seasonality_prior_scale']
)
m.fit(df_train)
preds = m.predict(df_test)
m.plot(preds)
fig = go.Figure()
# Create and style traces
fig.add_trace(go.Scatter(x=df_test['ds'], y=df_test['y'], name='Actual',))
fig.add_trace(go.Scatter(x=preds['ds'], y=preds['yhat'], name='Prediction',))
fig.add_trace(go.Scatter(x=df_train['ds'], y=df_train['y'], name='Training Actuals',))
fig.add_trace(go.Scatter(x=preds['ds'], y=preds['yhat_lower'], name='yhat_lower', fill='tozeroy', mode='lines'))
fig.add_trace(go.Scatter(x=preds['ds'], y=preds['yhat_upper'], name='yhat_upper',fill='tozeroy', mode='lines'))
fig.update_layout(
title=f"model prediction vs actual",
xaxis_title="Timestamps",
yaxis_title=f"value",
font=dict(
family="Courier New, monospace",
size=12,
color="RebeccaPurple"
))
fig.show()
df_cv = cross_validation(m, horizon='5day')
df_p = performance_metrics(df_cv)
df_p.describe()
m.plot(preds)
preds2 = m.predict(df_temp)
m.plot_components(preds)